/*
 * The MIT License
 *
 * Copyright (c) 2019 The Broad Institute
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

package picard.arrays;

import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.VCFFileReader;
import htsjdk.variant.vcf.VCFHeader;
import org.apache.commons.lang.StringUtils;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import picard.PicardException;
import picard.arrays.illumina.IlluminaAdpcFileWriter;
import picard.arrays.illumina.IlluminaGenotype;
import picard.arrays.illumina.InfiniumVcfFields;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.StandardOptionDefinitions;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;

/**
 * A simple program to convert a Genotyping Arrays VCF to an ADPC file (Illumina intensity data file).
 *
 */
@CommandLineProgramProperties(
        summary = VcfToAdpc.USAGE_DETAILS,
        oneLineSummary = "Program to convert an Arrays VCF to an ADPC file.",
        programGroup = picard.cmdline.programgroups.GenotypingArraysProgramGroup.class
)
@DocumentedFeature
public class VcfToAdpc extends CommandLineProgram {
    static final String USAGE_DETAILS =
            "VcfToAdpc takes a VCF, as generated by GtcToVcf and generates an Illumina 'adpc.bin' file from it. " +
                    "An adpc.bin file is a binary file containing genotyping array intensity data that can be exported " +
                    "by Illumina's GenomeStudio and Beadstudio analysis tools. The adpc.bin file is used as an input to " +
                    "<a href='https://genome.sph.umich.edu/wiki/VerifyIDintensity'>VerifyIDintensity</a> a tool for " +
                    "detecting and estimating sample contamination of Illumina genotyping array data. " +
                    "If more than one VCF is used, they must all have the same number of loci." +
                    "<h4>Usage example:</h4>" +
                    "<pre>" +
                    "java -jar picard.jar VcfToAdpc \\<br />" +
                    "      VCF=input.vcf \\<br />" +
                    "      OUTPUT=output.adpc.bin \\<br />" +
                    "      SAMPLES_FILE=output.samples.txt \\<br />" +
                    "      NUM_MARKERS_FILE=output.num_markers.txt \\<br />" +
                    "</pre>";


    private final Log log = Log.getInstance(VcfToAdpc.class);

    @Argument(doc = "One or more VCF files containing array intensity data.")
    public List<File> VCF;

    @Argument(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc = "The output (adpc.bin) file to write.")
    public File OUTPUT;

    @Argument(shortName = "SF", doc = "A text file into which the names of the samples will be written. " +
            "These will be in the same order as the data in the adpc.bin file.")
    public File SAMPLES_FILE;

    @Argument(shortName = "NMF", doc = "A text file into which the number of loci in the VCF will be written. " +
            "This is useful for calling verifyIDIntensity.")
    public File NUM_MARKERS_FILE;

    @Override
    protected int doWork() {
        final List<File> inputs = IOUtil.unrollFiles(VCF, IOUtil.VCF_EXTENSIONS);
        IOUtil.assertFilesAreReadable(inputs);
        IOUtil.assertFileIsWritable(SAMPLES_FILE);
        IOUtil.assertFileIsWritable(NUM_MARKERS_FILE);
        IOUtil.assertFileIsWritable(OUTPUT);
        final List<String> sampleNames = new ArrayList<>();

        Integer numberOfLoci = null;
        try (IlluminaAdpcFileWriter adpcFileWriter = new IlluminaAdpcFileWriter(OUTPUT)) {
            for (final File inputVcf : inputs) {
                VCFFileReader vcfFileReader = new VCFFileReader(inputVcf, false);
                final VCFHeader header = vcfFileReader.getFileHeader();
                for (int sampleNumber = 0; sampleNumber < header.getNGenotypeSamples(); sampleNumber++) {
                    final String sampleName = header.getGenotypeSamples().get(sampleNumber);
                    sampleNames.add(sampleName);
                    log.info("Processing sample: " + sampleName + " from VCF: " + inputVcf.getAbsolutePath());

                    CloseableIterator<VariantContext> variants = vcfFileReader.iterator();
                    int lociCount = 0;
                    while (variants.hasNext()) {
                        final VariantContext context = variants.next();
                        final float gcScore = getFloatAttribute(context, InfiniumVcfFields.GC_SCORE);

                        final Genotype genotype = context.getGenotype(sampleNumber);
                        final IlluminaGenotype illuminaGenotype = getIlluminaGenotype(genotype, context);

                        final int rawXIntensity = getUnsignedShortAttributeAsInt(genotype, InfiniumVcfFields.X);
                        final int rawYIntensity = getUnsignedShortAttributeAsInt(genotype, InfiniumVcfFields.Y);

                        final Float normalizedXIntensity = getFloatAttribute(genotype, InfiniumVcfFields.NORMX);
                        final Float normalizedYIntensity = getFloatAttribute(genotype, InfiniumVcfFields.NORMY);

                        final IlluminaAdpcFileWriter.Record record = new IlluminaAdpcFileWriter.Record(rawXIntensity, rawYIntensity, normalizedXIntensity, normalizedYIntensity, gcScore, illuminaGenotype);
                        adpcFileWriter.write(record);
                        lociCount++;
                    }
                    if (lociCount == 0) {
                        throw new PicardException("Found no records in VCF' " + inputVcf.getAbsolutePath() + "'");
                    }
                    if (numberOfLoci == null) {
                        numberOfLoci = lociCount;
                    } else {
                        if (lociCount != numberOfLoci) {
                            throw new PicardException("VCFs have differing number of loci");
                        }
                    }
                }
            }
            writeTextToFile(SAMPLES_FILE, StringUtils.join(sampleNames, "\n"));
            writeTextToFile(NUM_MARKERS_FILE, "" + numberOfLoci);
        } catch (Exception e) {
            log.error(e);
            return 1;
        }

        return 0;
    }

    private void writeTextToFile(final File output, final String text) throws IOException {
        try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(
                new FileOutputStream(output), StandardCharsets.UTF_8))) {
            writer.write(text);
        }
    }

    private IlluminaGenotype getIlluminaGenotype(final Genotype genotype, final VariantContext context) {
        final IlluminaGenotype illuminaGenotype;
        if (genotype.isCalled()) {
            // Note that we remove the trailing '*' that appears in alleles that are reference.
            final String illuminaAlleleA = StringUtils.stripEnd(getStringAttribute(context, InfiniumVcfFields.ALLELE_A), "*");
            final String illuminaAlleleB = StringUtils.stripEnd(getStringAttribute(context, InfiniumVcfFields.ALLELE_B), "*");
            if (genotype.getAlleles().size() != 2) {
                throw new PicardException("Unexpected number of called alleles in variant context " + context + " found alleles: " + genotype.getAlleles());
            }
            final Allele calledAllele1 = genotype.getAllele(0);
            final Allele calledAllele2 = genotype.getAllele(1);

            if (calledAllele1.basesMatch(illuminaAlleleA)) {
                if (calledAllele2.basesMatch(illuminaAlleleA)) {
                    illuminaGenotype = picard.arrays.illumina.IlluminaGenotype.AA;
                } else if (calledAllele2.basesMatch(illuminaAlleleB)) {
                    illuminaGenotype = picard.arrays.illumina.IlluminaGenotype.AB;
                } else {
                    throw new PicardException("Error matching called alleles to Illumina alleles.  Context: " + context);
                }
            } else if (calledAllele1.basesMatch(illuminaAlleleB)) {
                if (calledAllele2.basesMatch(illuminaAlleleA)) {
                    illuminaGenotype = picard.arrays.illumina.IlluminaGenotype.AB;
                } else if (calledAllele2.basesMatch(illuminaAlleleB)) {
                    illuminaGenotype = picard.arrays.illumina.IlluminaGenotype.BB;
                } else {
                    throw new PicardException("Error matching called alleles to Illumina alleles.  Context: " + context);
                }
            } else {
                // We didn't match up the illumina alleles to called alleles
                throw new PicardException("Error matching called alleles to Illumina alleles.  Context: " + context);
            }
        } else {
            illuminaGenotype = picard.arrays.illumina.IlluminaGenotype.NN;
        }
        return illuminaGenotype;
    }

    private int getUnsignedShortAttributeAsInt(final Genotype genotype, final String key) {
        final int attributeAsInt = Integer.parseInt(getRequiredAttribute(genotype, key).toString());
        if (attributeAsInt < 0) {
            throw new PicardException("Value for key " + key + " (" + attributeAsInt + ") is <= 0!  Invalid value for unsigned int");
        }
        if (attributeAsInt > picard.arrays.illumina.InfiniumDataFile.MAX_UNSIGNED_SHORT) {
            log.warn("Value for key " + key + " (" + attributeAsInt + ") is > " + picard.arrays.illumina.InfiniumDataFile.MAX_UNSIGNED_SHORT + " (truncating it)");
            return picard.arrays.illumina.InfiniumDataFile.MAX_UNSIGNED_SHORT;
        }
        return attributeAsInt;
    }

    private Float getFloatAttribute(final Genotype genotype, final String key) {
        final Object value = genotype.getAnyAttribute(key);
        if (value != null) {
            return Float.parseFloat(value.toString());
        }
        return null;
    }

    private Object getRequiredAttribute(Genotype genotype, final String key) {
        final Object value = genotype.getAnyAttribute(key);
        if (value == null) {
            throw new PicardException("Unable to find attribute " + key + " in VCF Genotype field.  Is this an Arrays VCF file?");
        }
        return value;
    }

    private float getFloatAttribute(final VariantContext context, final String key) {
        return Float.parseFloat(getRequiredAttribute(context, key).toString());
    }

    private String getStringAttribute(final VariantContext context, final String key) {
        return getRequiredAttribute(context, key).toString();
    }

    private Object getRequiredAttribute(final VariantContext context, final String key) {
        final Object value = context.getAttribute(key);
        if (value == null) {
            throw new PicardException("Unable to find attribute " + key + " in VCF.  Is this an Arrays VCF file?");
        }
        return value;
    }
}
